package com.shujia.spark.core

import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}

object Demo5Sample {

  def main(args: Array[String]): Unit = {

    val conf: SparkConf = new SparkConf()
      .setAppName("sample")
      .setMaster("local")

    val sc: SparkContext = new SparkContext(conf)

    val students: RDD[String] = sc.textFile("data/students.txt")

    /**
      * sample： 抽样算子
      * withReplacement:是否放回
      * fraction:比例，有误差的
      *
      */

    val sampleRDD: RDD[String] = students.sample(false,0.1)

    sampleRDD.saveAsTextFile("data/1")
  }

}
